Content
- The most popular content by clicks are forum, homepage, outside content, and quizzes.
library(tidyverse)
library(ggplot2)
library(patchwork)
feat <- read.csv("../../data/clean/features.csv")
This data describes student performance in online learning through The Open University, a university based in the United Kingdom. Courses are delivered in a structured fashion entirely online, and students are assigned small tutoring groups to support their learning.
The visualizations here describe the relationships between some characteristics of students and courses, as well as the scores achieved.
Scores are assigned in three different ways: TMA (tutor-marked), CMA (computer-marked), and “Exam” which is the final exam for a course. Different courses are called modules, and are labeled with letter codes here such as AAA or GGG in the column code_module. Courses are offered at different times of year as well, and this is shown in code_presentation.
Student interaction with the course material online is described by how many times they click on a specific piece of content, for example sum_clicksum_glossary sums the number of times the student clicked the glossary for the course in question.
Student characteristics offered include gender, disability status, highest formal education level achieved, age group (very broad), deprivation/poverty level (IMD Band), and region of UK where they reside.
Some features recoded in python script.
The id_assessment column probably has internal correlations that would lend to grouping.
ggplot(feat, aes(x=as.factor(code_module)))+
theme_bw()+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 1500)
Semester
ggplot(feat, aes(x=as.factor(code_presentation)))+
theme_bw()+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 1500)
Type of grading
ggplot(feat, aes(x=as.factor(assessment_type)))+
theme_bw()+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 1500)
ggplot(feat, aes(x=num_of_prev_attempts))+
theme_bw()+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 5000)
ggplot(feat, aes(x=log(num_of_prev_attempts), group=as.factor(code_module), fill = as.factor(code_module)))+
facet_grid(assessment_type~.)+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=studied_credits))+
theme_bw()+
geom_density()+
geom_vline(xintercept=75)
ggplot(feat, aes(x=studied_credits, group=as.factor(code_module), fill = as.factor(code_module)))+
facet_grid(assessment_type~.)+
theme_bw()+
geom_vline(xintercept=75)+
geom_density(alpha = .5)
These indicate the days since the module was taught
ggplot(feat, aes(x=date))+
theme_bw()+
geom_density()
ggplot(feat, aes(x=date, group=as.factor(assessment_type), fill = as.factor(assessment_type)))+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=date, group=as.factor(code_module), fill = as.factor(code_module)))+
facet_grid(assessment_type~.)+
theme_bw()+
geom_density(alpha = .5)
skimr::skim(feat[, c("sum_clicksum_dataplus","sum_clicksum_dualpane","sum_clicksum_externalquiz"
,"sum_clicksum_folder","sum_clicksum_forumng","sum_clicksum_glossary","sum_clicksum_homepage","sum_clicksum_htmlactivity"
,"sum_clicksum_oucollaborate","sum_clicksum_oucontent","sum_clicksum_ouelluminate","sum_clicksum_ouwiki","sum_clicksum_page"
,"sum_clicksum_questionnaire","sum_clicksum_quiz","sum_clicksum_repeatactivity","sum_clicksum_resource","sum_clicksum_sharedsubpage"
,"sum_clicksum_subpage","sum_clicksum_url")])
| Name | …[] |
| Number of rows | 190508 |
| Number of columns | 20 |
| _______________________ | |
| Column type frequency: | |
| numeric | 20 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| sum_clicksum_dataplus | 0 | 1 | 2.98 | 9.54 | 0 | 0 | 0 | 0 | 143 | ▇▁▁▁▁ |
| sum_clicksum_dualpane | 0 | 1 | 0.97 | 3.51 | 0 | 0 | 0 | 0 | 68 | ▇▁▁▁▁ |
| sum_clicksum_externalquiz | 0 | 1 | 3.18 | 10.79 | 0 | 0 | 0 | 0 | 340 | ▇▁▁▁▁ |
| sum_clicksum_folder | 0 | 1 | 0.33 | 1.12 | 0 | 0 | 0 | 0 | 13 | ▇▁▁▁▁ |
| sum_clicksum_forumng | 0 | 1 | 380.95 | 704.88 | 0 | 57 | 178 | 422 | 13154 | ▇▁▁▁▁ |
| sum_clicksum_glossary | 0 | 1 | 3.48 | 39.14 | 0 | 0 | 0 | 0 | 2952 | ▇▁▁▁▁ |
| sum_clicksum_homepage | 0 | 1 | 330.82 | 370.80 | 0 | 119 | 226 | 416 | 7277 | ▇▁▁▁▁ |
| sum_clicksum_htmlactivity | 0 | 1 | 0.44 | 1.76 | 0 | 0 | 0 | 0 | 33 | ▇▁▁▁▁ |
| sum_clicksum_oucollaborate | 0 | 1 | 4.39 | 12.17 | 0 | 0 | 0 | 3 | 316 | ▇▁▁▁▁ |
| sum_clicksum_oucontent | 0 | 1 | 554.06 | 811.90 | 0 | 37 | 205 | 761 | 9308 | ▇▁▁▁▁ |
| sum_clicksum_ouelluminate | 0 | 1 | 2.48 | 11.03 | 0 | 0 | 0 | 0 | 317 | ▇▁▁▁▁ |
| sum_clicksum_ouwiki | 0 | 1 | 33.69 | 82.39 | 0 | 0 | 0 | 32 | 2117 | ▇▁▁▁▁ |
| sum_clicksum_page | 0 | 1 | 3.33 | 7.24 | 0 | 0 | 0 | 4 | 334 | ▇▁▁▁▁ |
| sum_clicksum_questionnaire | 0 | 1 | 3.82 | 9.36 | 0 | 0 | 0 | 0 | 89 | ▇▁▁▁▁ |
| sum_clicksum_quiz | 0 | 1 | 376.21 | 565.80 | 0 | 30 | 119 | 615 | 13032 | ▇▁▁▁▁ |
| sum_clicksum_repeatactivity | 0 | 1 | 0.00 | 0.03 | 0 | 0 | 0 | 0 | 3 | ▇▁▁▁▁ |
| sum_clicksum_resource | 0 | 1 | 49.55 | 76.08 | 0 | 18 | 34 | 62 | 5147 | ▇▁▁▁▁ |
| sum_clicksum_sharedsubpage | 0 | 1 | 0.00 | 0.07 | 0 | 0 | 0 | 0 | 6 | ▇▁▁▁▁ |
| sum_clicksum_subpage | 0 | 1 | 173.68 | 194.10 | 0 | 36 | 108 | 252 | 4345 | ▇▁▁▁▁ |
| sum_clicksum_url | 0 | 1 | 26.42 | 43.15 | 0 | 4 | 14 | 35 | 2134 | ▇▁▁▁▁ |
Quick test of the high-clicking outliers
DT::datatable(feat[feat$sum_clicksum_quiz > 10000,])
DT::datatable(feat[feat$sum_clicksum_forumng > 10000,])
ggplot(feat, aes(x=as.factor(age_band)))+
theme_bw()+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 1500)
ggplot(feat, aes(x=as.factor(gender)))+
theme_bw()+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 1500)
ggplot(feat, aes(x=as.factor(imd_band)))+
theme_bw()+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 1500)
ggplot(feat, aes(x=as.factor(highest_education)))+
theme_bw()+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 1500)
ggplot(feat, aes(x=as.factor(region)))+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
geom_histogram(stat="count")+
geom_text(stat='count', aes(label=..count..), nudge_y = 1500)
feat$imd_band <- factor(feat$imd_band, levels = c("0-10%","10-20","20-30%","30-40%","40-50%","50-60%","60-70%","70-80%","80-90%","90-100%"))
ggplot(feat, aes(x=imd_band, group=(gender), fill = (gender)))+
theme_bw()+
geom_histogram(stat="count", alpha = .5)+
geom_text(stat='count', aes(label=..count..), position = position_stack(vjust = 0.5))
feat$imd_band <- factor(feat$imd_band, levels = c("0-10%","10-20","20-30%","30-40%","40-50%","50-60%","60-70%","70-80%","80-90%","90-100%"))
ggplot(feat, aes(x=code_module, group=(imd_band), fill = (imd_band)))+
theme_bw()+
geom_histogram(stat="count", alpha = .5)+
geom_text(stat='count', aes(label=..count..), position = position_stack(vjust = 0.5))
feat$test_plus <- paste(feat$code_presentation, feat$assessment_type)
ggplot(feat, aes(x=test_plus, group=(gender), fill = (gender)))+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
geom_histogram(stat="count", alpha = .5)+
geom_text(stat='count', aes(label=..count..), position = position_stack(vjust = 0.5))
feat$test_plus <- paste(feat$code_module, feat$assessment_type)
ggplot(feat, aes(x=test_plus, group=(gender), fill = (gender)))+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
geom_histogram(stat="count", alpha = .5)+
geom_text(stat='count', aes(label=..count..), position = position_stack(vjust = 0.5))
ggplot(feat, aes(x=score))+
theme_bw()+
geom_density()
ggplot(feat, aes(x=score, group=as.factor(assessment_type), fill = as.factor(assessment_type)))+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=as.factor(code_module), fill = as.factor(code_module)))+
facet_grid(assessment_type~.)+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=as.factor(assessment_type), fill = as.factor(assessment_type)))+
facet_grid(code_module~.)+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=as.factor(gender), fill = as.factor(gender)))+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=as.factor(code_module), fill = as.factor(code_module)))+
facet_grid(gender~.)+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=as.factor(gender), fill = as.factor(gender)))+
facet_grid(code_module~.)+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=as.factor(gender), fill = as.factor(gender)))+
facet_grid(code_module~., scales = "free_y")+
theme_bw()+
geom_histogram(alpha = .5)
Clearly not randomly missing
feat$imd_band <- factor(feat$imd_band, levels = c("0-10%","10-20","20-30%","30-40%","40-50%","50-60%","60-70%","70-80%","80-90%","90-100%"))
ggplot(feat, aes(x=score, group=(imd_band), fill = (imd_band)))+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=(code_module), fill = (code_module)))+
facet_grid(imd_band~.)+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=(imd_band), fill = (imd_band)))+
facet_grid(code_module~.)+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=(imd_band), fill = (imd_band)))+
facet_grid(code_module~., scales = "free_y")+
theme_bw()+
geom_histogram(alpha = .5)
Retries score lower on tests - indicating that having to retake it displays poor command of material.
feat$prev_over_zero <- ifelse(feat$num_of_prev_attempts > 0, 1,0)
ggplot(feat, aes(x=score, group=as.factor(prev_over_zero), fill = as.factor(prev_over_zero)))+
facet_grid(assessment_type~.)+
theme_bw()+
geom_density(alpha = .5)
ggplot(feat, aes(x=score, group=as.factor(num_of_prev_attempts), fill = as.factor(num_of_prev_attempts)))+
facet_grid(num_of_prev_attempts~.)+
theme_bw()+
geom_density(alpha = .5)
Taking more than 100 credits at a time is bad for overall performance.
feat$cred_over_100 <- ifelse(feat$studied_credits > 100, 1,0)
ggplot(feat, aes(x=score, group=as.factor(cred_over_100), fill = as.factor(cred_over_100)))+
facet_grid(assessment_type~.)+
theme_bw()+
geom_density(alpha = .5)
feat$test_plus <- paste(feat$code_presentation, feat$assessment_type)
ggplot(feat, aes(x=score, group=(test_plus), fill = (test_plus)))+
facet_grid(code_presentation~.)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
geom_density(alpha = .5)
feat$test_plus <- paste(feat$code_module, feat$assessment_type)
ggplot(feat, aes(x=score, group=(test_plus), fill = (test_plus)))+
facet_grid(code_module~.)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
geom_density(alpha = .5)
feat$test_plus <- paste(feat$code_module, feat$code_presentation)
ggplot(feat, aes(x=score, group=(test_plus), fill = (test_plus)))+
facet_grid(code_module~.)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
geom_density(alpha = .5)